
# This scripts loads the data to conduct analysis for all of the result tables and figures in the paper and appendix
# Inputs: (1) idealpts.csv
#           (2) stan.fit.RData
#          (3) ar5ar6statement_dyad.csv
#          (4) ar5ar6statement_level.csv
#          (5) covariates.csv
# Outputs: No Output files, results displayed in-line

rm(list = setdiff(ls(),"root")) #clear workspace
#----- Set Working Directory
path <- 'replication_final'

setwd(path)

# Load Libraries
library(tidyverse) # version ‘2.0.0’
library(fixest) # version ‘0.12.1’
library(marginaleffects) # version ‘0.22.0’
library(rstan) # version ‘2.32.6’
library("bayesplot") # version ‘1.11.1.9000’
library(modelsummary) # Version ‘2.2.0’
library(caret) # Version  ‘7.0.1’

rstan_options(auto_write = TRUE)



#--------- LOAD DATA 
# Ideal Points
idealpts <- read_csv(paste0(path, '/idealpts.csv'))

# Load stan output
load(file = paste0(path, "/stan.fit.RData"))

# Statement-Dyad Level Data
statementdyad <- read_csv(paste0(path,'/ar5ar6statement_dyad.csv'))

# Statement-Level Data
statements <- read_csv(paste0(path,'/ar5ar6statement_level.csv'))
statements

# Covariates
# load covariates
covariates <- read_csv(paste0(path, "/covariates.csv")) %>% 
                    filter(year==2013) %>% 
                    dplyr::select(iso3c, gdppc,ghgtotal ,gdp,
                                     emissionspercapita, histemissions, unideal, 
                                     pop
                                     ) %>% 
                    mutate(logpop = log(pop), 
                            logghtotal = log(ghgtotal), 
                            loggdppc = log(gdppc),
                            loggdp = log(gdp),
                            emissionintensity = ghgtotal/gdp, 
                            ghgtotal = ghgtotal/1000000, 
                            # unstandandardized variables (for plotting and summary stats)
                            ghgtotal_unstd = ghgtotal*1000,
                            unideal_unstd = unideal, 
                                gdppc_unstd = gdppc,
                                loggdp_unstd = loggdp
                            ) %>% 
                    # standardize all variables
                    mutate_at(vars(c("unideal", "ghgtotal", "gdppc", "loggdp")), scale)
covariates

# join covariates into ideal points
idealpts <- idealpts %>% left_join(covariates, by="iso3c")


# Setting a dictionary 
setFixest_dict(c(unideal = "UN Ideal Point",
                 ghgtotal = "Total GHG emissions",
                  gdppc = "GDP per capita",
                  loggdp = "Log GDP", 
                  x1 = "IPCC Ideal Point", 

                  unideal_diff = "UN Ideal Point Difference", 
                    ghgtotal_diff = "Emissions Difference",
                 uncertaintyscore2 = "Uncertainty",
                 iso3cA = "Country i", 
                 iso3cB = "Country j", 
                 sectionid_unique = "Report Section",
                 statementid = "Statement",
                 x = "Agreement", 
                dyadid = "Dyad",

                ldyadictrade = "log(Trade)", 
                ldist = "log(Inter-Capital Distance)",
                alliance = "Military Alliance",
                contig = "Contiguous",
                comlang_off = "Common Language",
                comcol = "Common Colonizer"

                 ))



#----- MAIN MANUSCRIPT RESULTS -----#

#-- Figure 5: Plot Estimated IPCC Ideal Points
p_fig5<- ggplot(idealpts %>% 
                # missing value for plotting purposes
                mutate(ghgtotal_unstd = replace_na(ghgtotal_unstd, 0.1)))+
    geom_linerange(aes(y=order1,xmin=x1down,xmax=x1up),size=1, alpha = 0.3)+
    geom_point(aes(y=order1,x=x1, size = (ghgtotal_unstd/1000)), color = "black")+
    geom_point(aes(y=order1,x=x1, size = (ghgtotal_unstd/1000)-1, color =unideal)) +
    geom_text(aes(y=order1,x=x1up,label=country),hjust = 0, nudge_x = 0.1)+
    theme_bw()+
    scale_color_distiller(palette = "RdBu", direction = 1,na.value = "grey50")+
    theme(axis.text.y = element_blank(),
          axis.title.y = element_blank(),
          axis.text.x = element_text(size=12),
          axis.ticks.y = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank())+
    labs(size = "Total GHG emissions, \nmillion kt (2013)", color = "UN Ideal Point", x = "IPCC Ideal Point")+
    theme(legend.position = "top", legend.direction = "horizontal")+
    xlim(-1.2,1.5)
p_fig5

#-- Table 2: Regression showing predictors of IPCC ideal points
tab2<- feols(x1 ~ sw0(unideal,ghgtotal, unideal+ghgtotal, unideal+ghgtotal+gdppc+loggdp), 
                data = idealpts  %>% filter(!iso3c %in% c("DEU", "SAU")),
                )
etable(tab2[2:4], 
        se = "hetero",
        digits = 3,
        digits.stats = 3,
        order = c("UN", "Total", "capita", "Log GDP"))


#-- Table 3: Regression showing that ideologically Divergent Dyads less Like to conflict over uncertain statements
tab_3<- feols(x~ csw(unideal_diff+ghgtotal_diff,uncertaintyscore2,uncertaintyscore2:unideal_diff+uncertaintyscore2:ghgtotal_diff,ldyadictrade+ldist+alliance+contig+comlang_off+comcol)|iso3cA + iso3cB + sectionid_unique, 
                    , cluster = ~statementid+dyadid,
                    data = statementdyad )
dvmean_tab3<- statementdyad$x %>% mean #outcome mean
# NOTE: Table 3 suppresses the coefficients for dyadic controls for brevity
etable(tab_3, 
        extralines = list(
                        "_^Outcome Mean"=rep(dvmean_tab3,4), 
                        "-_Dyadic Controls" = c("x","x","x","$\\checkmark$")
                        ))





#-- Figure 6: Plot Predicted values of agreement by divergence in UN ideal point and uncertainty
# Note, add fixed effects separately for the predictions function
mols<- feols(x~ uncertaintyscore2+unideal_diff+ghgtotal_diff+uncertaintyscore2:unideal_diff+uncertaintyscore2:ghgtotal_diff+ldyadictrade+ldist+alliance+contig+comlang_off+comcol +iso3cA + iso3cB+ factor(sectionid_unique), 
                    , cluster = ~statementid+dyadid,
                    data = statementdyad )
predictedvalues <- predictions(mols, newdata = datagrid(uncertaintyscore2 = c(1,2,3,4), unideal_diff = 0:5))

# example dyad ideal point differences
sauusa_ideal<- statementdyad %>% filter(dyadid %in% c("SAUUSA")) %>% pull(unideal_diff) %>% unique
deuusa_ideal<- statementdyad %>% filter(dyadid %in% c("DEUUSA")) %>% pull(unideal_diff) %>% unique

# Convert uncertainty score to factor for plotting
predictedvalues<- predictedvalues %>% 
                    mutate(uncertaintyscore2_f = 
                            factor(uncertaintyscore2, 
                                    levels = c(1,2,3,4),
                                    labels = c("1 (Very High Confidence)","2 (High Confidence)","3 (Medium Confidence)","4 (Low or Very Low Confidence)")
                                    )
                    )
dodge <- position_dodge(width=-0.3)  
p_fig6<- ggplot(predictedvalues , 
        aes(x = unideal_diff, y = estimate, group = uncertaintyscore2_f, color = uncertaintyscore2_f)) + 
    geom_point(position=dodge)+
    geom_line(position=dodge)+
    geom_linerange(aes(ymin = conf.low, ymax = conf.high),position=dodge)+
    labs(x = "UN Ideal Point Difference",
         y = "Predicted Agreement", 
        color = "Uncertainty"
         )+
         scale_color_brewer(palette = "RdBu", direction = -1)+
    theme_bw()
# annotate with arrows for example dyad ideal point differences
p_fig6<- p_fig6 + 
        annotate("segment", x = sauusa_ideal, xend = sauusa_ideal, y = -1.4, yend = -1.5, colour = "black", size=0.5, arrow = arrow(length=unit(0.2, "cm")))+
        annotate("segment", x = deuusa_ideal, xend = deuusa_ideal, y = -1.4, yend = -1.5, colour = "black", size=0.5, arrow = arrow(length=unit(0.2, "cm")))+
        annotate("text", x = c(sauusa_ideal,deuusa_ideal), y = c(-1.37,-1.37), 
                label = c("USA-Saudi Arabia", "USA-Germany") , color="black", 
                size=2.5 , angle=0, fontface="bold")

p_fig6



#----- APPENDIX -----#

#-- Section A.1 - Summary Statistics 
# Statement-Level
datasummary((`Uncertainy`=uncertaintyscore2) +
            (`Number of References` = nreferences) +
            (`Statement Length (characters)` = lenstatement)
                ~ 
                N+Mean+SD+Min+Max, data = statements, output = "latex")
# Country-Level
countries <- c(
        statementdyad %>% pull(iso3cA) %>% unique(),
        statementdyad %>% pull(iso3cB) %>% unique(), 
        "EU"
        ) %>% unique()
datasummary((`UN Ideal Point (2007-2013)`=unideal_unstd)+
            (`Total GHG emissions, mt (2013)`=(ghgtotal_unstd) ) +
            (`GDP per Capita, 000s (2013)`=gdppc_unstd/1000) +
            (`Log GDP (2013)` = loggdp_unstd)
                ~ 
                N+Mean+SD+Min+Max, data = covariates %>% filter(iso3c %in% countries), 
                output = "latex")

# Dyad-Level
datasummary(
            (`UN Ideal Point Difference` = unideal_diff)+
                (`Emissions Difference` = ghgtotal_diff)+
                (`log(Trade)` = ldyadictrade)+
                (`log(Inter-Capital Distance)` = ldist)+
                (`Military Alliance` = alliance)+
                (`Contiguous` = contig)+
                (`Common Language` = comlang_off)+
                (`Common Colonizer` = comcol) 
                ~ 
                N+Mean+SD+Min+Max, 
                data = statementdyad %>% group_by(dyadid) %>% 
                                summarize_at(vars(unideal_diff,ghgtotal_diff, ldyadictrade,
                                                ldist, alliance, contig, comlang_off, comcol),
                                         list(~mean(.,na.rm = T))),
                output = "latex")
# Dyad-Statement Level
datasummary((`Agreement`=x) 
                ~ 
                N+Mean+SD+Min+Max, data = statementdyad, 
                output = "latex")


#-- Figure 7: Distribution of Uncertainty in Statements
p_fig7<- statements %>% 
        group_by(document, section, subsection, statementid, statement) %>%
        summarize(uncertaintyscore = mean(uncertaintyscore2, na.rm = T))   %>% 
        ungroup()  %>%
        mutate(Report =  ifelse(str_detect(document, "ar6"), "AR6 (2023)", "AR5 (2014)"), 
               `Working Group` = case_when(
                    str_detect(document, "wg1") ~ "WG1: Physical Science Basis",
                    str_detect(document, "wg2") ~ "WG2: Impacts, Adaptation and Vulnerability",
                    str_detect(document, "wg3") ~ "WG3: Mitigation of Climate Change"
               ))  %>%
        ggplot(aes(x = uncertaintyscore, group = Report, fill = Report)) +
        geom_density(alpha = 0.5) +
        facet_wrap(~`Working Group`, scales = "fixed", nrow = 3) +
        labs(x = "Uncertainty Score", y = "Density") +
        theme_bw() +
        scale_fill_manual(values = c("AR5 (2014)" = "tomato4", "AR6 (2023)" = "deepskyblue4")) 
p_fig7




#-- Figure 8: Traceplots from Stan Model
# traceplots for two countries (16=CHN, 85=USA)
posterior2 <- rstan::extract(stan.fit, inc_warmup = TRUE, permuted = FALSE)
color_scheme_set("mix-blue-pink")
p_fig8 <- mcmc_trace(posterior2,  pars = c("x[16,1]", "x[85,1]"), n_warmup = 1000,
                facet_args = list(nrow = 2, labeller = label_parsed))
p_fig8<- p_fig8 + facet_text(size = 15)
p_fig8

#-- Figure 9: Distribution of Rhats
rhats<- rhat(stan.fit)
rhats<- rhats[str_detect(names(rhats), "x")] 

p_fig9<- ggplot(data =tibble(RHat = rhats))+
    geom_histogram(aes(x = RHat), bins = 20, fill = "grey80", color = "black")+
    theme_minimal()+
    labs(title = " ", x = "Rhat", y = "Frequency")+
        geom_vline(aes(xintercept = mean(rhats)), color = "red")+ 
        annotate("text", x = mean(rhats)+0.0003, y = 13, label = paste0("Mean=",round(mean(rhats),2)), 
                 color = "red", vjust = -1)
p_fig9


#-- Table 6: Regression showing predictors of IPCC ideal points, with more variables 
etable(tab2, 
        style.tex = style.tex(var.title = "\\hline"),
        digits = 3,
        digits.stats = 3,
        se = "hetero",
        order = c("UN", "Total", "capita", "Log GDP"))



#-- Table 7: Regression showing that main effect is robust to inclusion of country-statement FEs
tab7<- feols(x~ csw(unideal_diff+ghgtotal_diff,uncertaintyscore2,uncertaintyscore2:unideal_diff+uncertaintyscore2:ghgtotal_diff,ldyadictrade+ldist+alliance+contig+comlang_off+comcol)|iso3cA^statementid + iso3cB^statementid + sectionid_unique, 
                    , cluster = ~statementid+dyadid,
                    data = statementdyad )
dvmean_tab7<- statementdyad$x  %>% mean(na.rm = T)
etable(tab7, 
        fitstat="n",
        digits = 3,
        drop = c("ldyadictrade","ldist","alliance","contig","comlang_off","comcol"),
        order = c("times"),
        fixef.group = list("Country x Statement FE"="Country"), 
        extralines = list(
                            "_^Outcome Mean"=rep(dvmean_tab7,4), 
                            "-_Dyadic Controls" = c("x","x","x","$\\checkmark$")
                            ))



#-- Table 8: Regression showing Main result with Genovese (2014) Country Scores
tab8<- feols(x~ csw(genov_diff,uncertaintyscore2,uncertaintyscore2:genov_diff,ldyadictrade+ldist+alliance+contig+comlang_off+comcol)|iso3cA + iso3cB + sectionid_unique, 
                    , cluster = ~statementid+dyadid,
                    data = statementdyad )
dvmean_tab8<- statementdyad$x  %>% mean(na.rm = T)
setFixest_dict(c(genov_diff = "Genovese (2014) Country Score Diﬀerence"))
etable(tab8, 
        fitstat="n",
        digits = 3,
        drop = c("ldyadictrade","ldist","alliance","contig","comlang_off","comcol"),
        order = c("times"),
        fixef.group = list("Country FE"="Country"), 
        extralines = list(
                            "_^Outcome Mean"=rep(dvmean_tab8,4), 
                            "-_Dyadic Controls" = c("x","x","x","$\\checkmark$")
                            ))


#-- Table 9: Goodness of Fit measures for model in Table 3
# model with just dyadic controls
mod_reference<- feols(x~ ldyadictrade+ldist+alliance+contig+comlang_off+comcol|iso3cA + iso3cB + sectionid_unique, 
                    , cluster = ~statementid+dyadid,
                    data = statementdyad )
# model in the paper 
mod_actual<- feols(x~ uncertaintyscore2:unideal_diff+uncertaintyscore2:ghgtotal_diff+unideal_diff+ghgtotal_diff+uncertaintyscore2+ldyadictrade+ldist+alliance+contig+comlang_off+comcol|iso3cA + iso3cB + sectionid_unique, 
                    , cluster = ~statementid+dyadid,
                    data = statementdyad )
# gather all the predictions
confusionmat_data <- statementdyad
confusionmat_data$predval_reference <- predict(mod_reference, newdata = statementdyad)
confusionmat_data$predval_actual <- predict(mod_actual, newdata = statementdyad)

# create predicted values
confusionmat_data<- confusionmat_data %>% 
            mutate(
                    predx_reference = case_when(predval_reference>0.5 ~ 100, 
                                    predval_reference< -0.5 ~ -100,
                                    predval_reference >= -0.5 & predval_reference <= 0.5 ~ 0, 
                                    TRUE ~ NA_integer_),
                    predx_actual = case_when(predval_actual>0.5 ~ 100, 
                                    predval_actual< -0.5 ~ -100,
                                    predval_actual >= -0.5 & predval_actual <= 0.5 ~ 0, 
                                    TRUE ~ NA_integer_))
# create confusion matrices
cm_reference<- confusionMatrix(data=factor(confusionmat_data$predx_reference), reference = factor(confusionmat_data$x))
cm_actual<- confusionMatrix(data=factor(confusionmat_data$predx_actual), reference = factor(confusionmat_data$x))

# goodness of fit metrics reported in table 9
cm_actual$byClass[,c("Specificity", "Sensitivity", "Balanced Accuracy")]
cm_reference$byClass[,c("Specificity", "Sensitivity", "Balanced Accuracy")]




#-- Table 10: Likelihood of Intervention by Statement uncertainty  
tab10<- feols(anyintervention~ uncertaintyscore2+ csw(llenstatement)|sw0(ar6+wg,sectionidunique), 
                    , cluster = ~statementid,
                    data = statements  )
dvmean_tab10<- statements$anyintervention %>% mean
etable(tab10, 
        digits = 3,
        digits.stats = 3,
        drop = c("Constant"),
        order = c("Uncertainty"),
        fitstat = c("n","r2"),
        fixef.group = list("Country"="Country"), 
        extralines = list("_^Outcome Mean"=rep(dvmean_tab10,3)
                            ))



#-- Table 11: Number of Interventions by Statement uncertainty  
tab11<- feols(numinterventions~ uncertaintyscore2+ csw(llenstatement)|sw0(ar6+wg,sectionidunique), 
                    , cluster = ~statementid,
                    data = statements  )
dvmean_tab11<- statements$numinterventions %>% mean
etable(tab11, 
        digits = 3,
        digits.stats = 3,
        drop = c("Constant"),
        order = c("Uncertainty"),
        fitstat = c("n","r2"),
        fixef.group = list("Country"="Country"), 
        extralines = list(
                            "_^Outcome Mean"=rep(dvmean_tab11,3)
                            ))



#-- Table 13: Regression showing that statements with more references are less uncertain
tab13<- feols(uncertaintyscore2~ nreferences +csw(llenstatement)|sw0(ar6+wg,sectionidunique), 
                    , cluster = ~statementid,
                    data = statements  )
etable(tab13)
dvmean_tab13<- statements$uncertaintyscore2 %>% mean(na.rm = T)
etable(tab13, 
        digits = 3,
        digits.stats = 3,
        drop = c("Constant"),
        order = c("References"),
        fitstat = c("n","r2"),
        fixef.group = list("Country"="Country"), 
        extralines = list(
                            "_^Outcome Mean"=rep(dvmean_tab13,3)
                            ))





